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Amendments to the Specification: 

Please delete paragraph [0007]. 

Please insert the following paragraph after paragraph [0107] before paragraph [0108] which is now 
renumbered paragraph [0109]: 

[0108] The following provides a pseudo code listing of operating system software that uses the 
present invention to recover from a processor cache memory error. 

I * — — — — — — = = = = = = = --fifli aMHB«HBMBa?:^: = = = = — = = = = = = = == = = = = = = = ur = S3=xs3 * j 

/* Definitions - These are provided to attempt to make the pseudo */ 
/* code easier to read and are not meant to be real */ 
/* definitions that can be used- */ 

/* processor State Parameter is located in PSP»rl8 at hand off from +/ 
/* SAL to the 0S_MCA handler. */ 

/* Processor State Parameter bit field definitions */ 
define TLB^Error - ProcessorStatParameter [60] 

/* SAX. Record Header Error Log Definitions */ 

^define Record_lD_Of f set = 0 

ftdefine Err_Severity_Of f set ■ 10 

#de£ine Recoverable = 0 

#define Fatal = 1 

#define Corrected ■ 2 

#define Record_Length_Of f set » 12 

tfdefine Record_Header_Len9th = 24 

/* SAL Section Header Error Log Definitions */ 

#define GUIDJDffset = 0 

# define Section_Length_Of f set = 20 

#define Processor_GUlD~= E4 2$PAF1-3CB7-1XD4 -BCA70080C73C8881 
#define Section_Header_Length = 24 

/* SAL Processor Error Record Definitions */ 

tfdefine Valdiation_Bit_Structure 
Froc_Error_Map_Valid = bit 0 
Cache_Check_Valid = bits [7:4] 
TLB_Check_Valid = bits [11:8] 
Bue_check"valid = bits [15:12] 
Reg_File_Check_Valid = bits [19:16} 
MS_Check_Valid - bits [23:201 

#define Error^ Validation_Bit_Length « 8 
#define CheckJCnf o_Valid~Bit » bit 0 
^define Target_Address_Valid_Bit - bit 3 
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#define Precise_lP_Valid_Bit bit 4 

^define Check_lnf ojDf f set = 0 
ftdefine Target_Addreae_Of €set ■ 24 
ttdefine Precise_lP_o£ fset « 32 

/* Cache Check Info Bit definitions */ 

ftdefine PrecisePrivLevel - bits [57:56] 
ftdefine PrecisePrivLevel_Valid » bits 58 

/ * ASflBIBIIIBBCw = = = = = = = = = = = =BEGINfeBlBIIHIiaillE3CS= = ==SZSC£Ss:s = s£dailBII*/ 

/* OS Machine Check Initialization */ 



/* Interrupt parameters by calling SAL_MC_SETPARAMS */ 
Install OS_Rendez_Interrupt_Handler 

Install OS_Rendez_Wakeup_Interrupt__Hanc : ler /* ISR clean up wrapper */ 
Regis ter_Rendez InterruptJTypefcVector; 
Reg i s t e r_WakeUp Inte r rup t _Type & Vec tor ; 
Register_CorrectedPlatformErrorInterrupt_Vector; 
Initialize_CMC_Vector_Masking; 

/* Register OS_MCA Entry Point parameters by calling SAL_SET_VECTORS */ 

Register_OS_MCA_£ntryPoint ; 
Register_OS_lNlT_EntryPoint ,- 



/* go to spinloop */ 
Mask_All_Interrupts; 
Call SAL_MC_RENDEZ ( ); 

/* clean-up after wakeup from exit */ 
Enable_All_lnterrupte ; 

/* return from interruption */ 
return ; 






interrupt Handler */ 

ES== = = = = = =: = = = = = = = = = = = As»«BaiBB*/ 



/* OS Corrected Error interrupt Handler (processor and platform) */ 
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OS_Corrected_Error_lnterrupt_Handler ( ) 
{ 

/* handler for corrected machine check intr.*/ 
/* get error log */ 
if (ProcessorCorrectedError) 

$al_Get_State_Info( processor); 

else 

Sal_Get_State_lnf o (platform) ; 

/* If saving of the error record is to disk or the OS event log, */ 
/* then this is core OS functionality. */ 

/* Save log of MCA */ 
Savc_Error_Log ( ) ; 

/+ now we can clear the errors */ 
if (ProcessorCorrectedError) 

Call Sal_clear_state_Xnfo (processor) ; 

else 

Call Sal_clear_state_lnfo (platform) ; 

/* return from interruption */ 
return; 

/* END */ 



/*IIIIBaBBE7^ = = = SS = = = BEGIN llll»l>n»»e^ = = =:= = rr = = = = = = = = =:*/ 

/* OS Core Machine Check Handler */ 

os_MCA_Handler ( ) 
{ 

/* handler for uncorrected machine check event */ 
Save_Proceseor_5tate() ; 

if (ErrorType t =Processor TLB) 
SwitchToVirtualMode () ; 

else 

St ay In Physical Mode {) ; 

/* Assuming that the OS can call SAL in physical mode to get info */ 
SAIf_QET_STATfi_IWFO (MCA) ; 

/* check for error */ 
if (ErrorType«proceesor) 

{ 

if (ErrorType=processor TLB) 
// cannot do much; 

// reset the system and get the error record at reboot 
SystemReset () or Re turnToSAL (failure) ; 

else 

ErrorCorrectedStatus-OsProcessorMca () 

) 

Xt (ErrorType^Platf orm) 

ErrorcorrectedStatus | =OsPlatformMca () ; 
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/* If the error is not corrected, OS may want to reboot the machine */ 
/* and can do it by returning to SAL with a failure return result. '*/ 

If (£rrorCorrectedStatu3*efailure) 
br anch=Re tur nToS AL_CHE CK 

/* Errors are corrected/ so try to wake up processors which are */ 
/* in Rendezvous. */ 

/* completed error handling */ 

If (ErrorCorrectedStatus-success && InRendezvous {) tatrue) 
WakeUpApplicationProcessorsFromRendezvous {) ; 

/* If saving of the error record is to disk or the OS event log, */ 
/* then this is core OS functionality. */ 

/* as a last thing */ 
save_Error_Log() ; 

/* This is a very important step, as this clears the error record */ 
/* and also indicates the end of machine check handling by the OS. */ 
/* SAL uses this to clear any state information it may have related */ 
/* to which processors are in the MCA and any State of earlier */ 
/* rendezvous. */ 

call sal_Clear_State_mfo(MCA) ; 

ReturnToSAL : ; 

/* return from interruption */ 
SwitchToPhysicalMode () ; 
Restore_Processor^State () ; 

/* return to SAL CHECK, SAL would dq a reset if OS fails to correct */ 
return (ErrorCorrectedStatus) 



ErrorCorrected«True ? 

/* check if the error is corrected by PAL or SAL */ 

If (ErrorRecord. Severity==not corrected) 

/* call sub- routine to try and correct the Platform MCA */ 
ErrorCorrected«Correctable_Platform_MCA(platform_error_type) ; 

Return (ErrorCorrectedStatus) ; 



/ ^= = == = = = ==±±±fiSSIDIBBX3BEEaEENDESBSSEE9i'! 



itataaaeai ir * / 



/* 0s Platform Machine Check Handler 

/ * S = tt=* B ■■■■■■ ■DBIIBas~ ===w^=:~ = = = = === = : 

OsPlatformMca () 



END' 
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/* OS Processor Machine Check Handler */ 
OsProcessorMca C ) 

{ 



ErrorCorrected=*True ; 

/* check if the error is corrected by Firmware */ 
If (ErrorRecord . Severity— not corrected) 

ErrorCo.rrectedStatus»TryProcessorErrorCorrection { ) ; 

Return (ErrorCorrectedStatus) ; 

} 

/ *= = = = = = = = = = = s= = = tt«iHU»BDaEND=7^= = 3 = = = = = = = = =.= zt=*)*mmmmmmmm = = ^ = = = = */ 



/* Try individual Processor Error Correction */ 

/* Now the OS has the data logs, start parsing the log retrieved from */ 
/* SAL- The sub -routine Read_OS_Error_Log will read data from the error ■*/ 
/* log copied from SAL* An offset is passed to identify the data being */ 
/* read and the base pointer is assumed to be known by the */ 
/* ReadjDS_Error_Log sub-routine just to simplify the pseudo-code. */ 

TryProcessorErrorCorrection( ) 

{ 

/* extract appropriate fields from the record header */ 
Record_lD - Read_OS_Error_Log(Record__ID_Of f set) ; 
Severity = Read_OS_Error_Log (Err_severity_pf f set) ; 

/* It is unlikely that the OS can write to persistant storage in */ 
/* physical mode. If it is possible, the OS should do so. If it is not, */ 
/* the SAL firmware should still have a copy of the error log stored */ 
/* to NVRAM that will be persistant across resets. */ 

if (Severity - = Fatal) 

Systemfteset 0 or return(f ailure) ,- 
if (Severity == Corrected) 

return (ErrorCorrectedSfcatus^True) ; 

/* These errors may be recoverable by the OS depending on the OS */ 

/* capability and the information logged by the processor. Call the */ 

/* sub-routine, OS__MCA_Recovery_Code and on return set up a min-state */ 

/* save area to return to a context of choice. The pal_mc_resume done */ 

/* through SAL allows the OS to turn on address translations and enable */ 

/* machine check aborts to be able to handle nested MCAs. */ 

if (Severity == Recoverable) 
{ 

£rrorCorrectedStatus^OS_MCA_Recovery ( ) ; 
Set_Up_A_Min_state_For_os_MCA_Recovery (myjnaine tate) ; 
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} 

return (ErrorCorrectedStatus) ; 
} /* End ot TryProcessorErrorCorrection Handler */ 

/^■■■EII3? = = = = = = sssssasiltllll£IIDiaiSSS = = ==sa = = sss£>saBailll8IIiC:ss = sssss*/ 



/*DCBC; = = = = = = = = = ===£iflai»BlBEGINBBSS;9Ss:asssS = SBSSBdtt«BIIBIBBBBEn? = = = = = */ 

/* OS MCA Recovery Code */ 

I ^SBD = = ssc = = = = = = = zfcllBIBBIIBXISSni;i;7ff= = = = = = = = = = = = =l-XilBBBBBBIIIBBBrv = ? = =^/ 



/* At this point the OS is running with address translations enabled. */ 

/* This is needed otherwise the OS would not be able to access all of */ 

/* its data structures needed to analyze if the error is recoverable */ 

/* or not. There is a chance another MCA may come during recovery due */ 

/* to this fact, but running in physical mode for the OS is difficult */ 

/* to do. */ 



OS_MCA_Recovery ( ) 
{ 

/* Set up by default that the errors are not corrected */ 
CorrectedErrorStatus ■ correctedcacheErr = CorrectedTlbErr « 
CorrectedBusErr = correctedRegPileErr = CorrectedUarchErr = 0; 

/* Start parsing the error log */ 

RecordLength ■ Read_OS_Error_Log {Record_Length — Of f set) ; 
Section_Header_Qf f set * os_ErrorjLog_Po inter + Record_Header_Length; 

/* Find the processor error log data */ 
Processor_Error_Log_Found ■ 0; 

/* traverse the error record structure to find processor section */ 
while (Processor_Error_Log Found «- 0) 

{ 

SectionGUID = Read_OS_Error_Log {Sect ion_Header_Of f set + 
Gtnr>_0£ f set) ; 

SectionLength » Read_OS_Error_Log ( Sect ion_Header_Of f set + 

S e c t i on_Le ngt h_0 f f s 
et) ; 

if (SectionGCJID == Processor_GUID) 
Processor_Error_Log_Found = 1; 

Sec tion_Body_Po inter * Section_Header_Of f set + 
S e c t i on_He a de r_Leng t h ; 

Section_Header_Of f set ■ Section_Header_Of f set + SectionLength; 

if (Section_Header_Of f set >■ RecordLength) 

internalError () ; /* Expecting a processor log */ 

} 

/* Start parsing the processor error log. Sect ion JBody_Pointer was set */ 
/* up to point to the first offset of the processor error log in the */ 
/* while loop above, check the valid bits to see which part of the */ 
/* structure has valid info. The Read_05_Error_Log sub -routine is */ 
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/* assumed to know the initial pointer and just an offset is passed. */ 
/* This was done to allow the pseudo-code Co be more readable. */ 

Proc_Valid_Bits * Read_ OS_Error_Log (Section_Body_Pointer) ; 
Section_Body_Pointer = Sect ion_Body_Po inter + Validation_Bit_Length; 

/* Read the Processor Error Map if the valid bit is set. */ 
if (Proc_valid_Bits (Proc_Errorjflap_yalid] == 1) 

Proc^ Error_Map * Read_OS_Error_Log (Section_Body_Pointer) ; 

/* Extract how many errors are valid in the error log and determine 
which type */ 

Cache_Check_Errs = Proc_Valid_Bits [Cache^Check^Valid] ; 
TLB_Check_Errs - Proc_Valid_Bits [TLB_Check_Valid] ; 
Bus_Check_Errs = proc_Valid_Bits [Bus^Chec^Valid] ; 
Reg_File_Errs - Proc_Valid_Bits [Reg_File_Check_ValidJ ; 
Uarch_Errs = Proc_valid_Bits [MS_Check_Valid] ; 

/* These sub-routines will return an indication of if the error can be 

corrected by killing the affected processes. */ 
if (cache_Check_Errs 0) 

{ 

/* check to see if one or multiple cache errors occured */ 
if {Cache_Check_Errs 1) 
CorrectedCacheErr ■ 

Handle_Single_Cache_Error { Sect ion_Body_Po inter) ,- 

else 

CorrectedCacheErr = 

Handle^Multiple^Cache^Errors (Section_Body_Pointer) 

} 

if (TLB_Chcck_Erre != 0) 
{ 

/* Check to see if one or multiple TLB errors occured */ 
if (TLB_Check_ErrS == l) 

CorrectedTlbErr = Handle_S ingle jTLB^Error (Section__Body_Pointer) ; 
else 

CorrectedTlbErr - 

Handle_Multiple_TLB_Errors (Section_Body Pointer) 

} 

if (Bus_check_Erre 1= 0) 

{ 

/* check to see if one or multiple Bus errors occured */ 
if (Bus_check_Errs == 1) 
CorrectedBusErr = 

Handle_Single_BuB_Error (Section__Body_Pointer) ; 

else 

CorrectedBusErr = 

Handle Multiple Bus Errors (Sect ion_Body_Pointer) ; 

} " 

if (Reg_File Errs 0) 
{ 
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/* Check to see if one or multiple Register file errors occured 
*/ 

if (Reg_File_Erre =- 1) 

CorrectedRegFileErr = 

Handle_Single_Reg_File_£rror (Section_Body_Pointer) ; 

else 

CorrectedRegFileErr = 

Handle Multiple Reg File_Errors (Section Body Point 
erj/ 

} 

if (Uarch Errs !- 0) 
{ 

/+ Check to see if one or multiple uarch file errors occured */ 
if (Uarch_Errs « 1) 

Correcteduarch_Err » 

Handle_Single_Uarch_Error (Section_Body_Pointer) ; 

else 

Correcteduarch_Err = 

Handle_Multiple Uarch_ErrorB (Sect ion JBody Pointer); 

} 

CorrectedErrorStatus ■ CorrectedcacheErr | CorrectedTlbErr | 
CorrectedBusErr | CorrectedRegFileErr | 
CorrectedUareh_Err ; 

return (CorrecteErrorStatus) ; 
} /* end OS_MCA_Recovery_Code */ 

/* = = =s = = = x±z±£l ^■■■■■■■■KeiBSS===:END= = = = = s = = = sss = = = - !ltaH ' IBaBllaaBbB ka>?~9 = = = A / 



/* = = ^= = = = = = = = = = = = --0«l«M«II»BEGn«»H = aBi: = =^ = = = = = = = = = = = = ===:=itx = lBBaBBBBB*/ 

/* Single Cache Error Recovery Code */ 
Handle_Single_Cache Error 

{ 

/* Initialize variables to a known value */ 

Cache_Check_Info = Target_Address_Length = Precise_IP_Info « -1; 
Cache_Check_Valid_BitB = Read_OS_Error_Log (Section_Body_Pointer) ; 
Section_Body_Pointer ■ Section_Body_ Pointer 
+ Error^Va 1 i da t i on_B i t JLe ng t h ; 

if (Check_Info_valid_Bit l) 

Cache_Check_Inf o ■ Read_OS_Error_Log (Sect ion_Body — Pointer +• 
Check_Info_Of fset) ; 

if (Target_Address_valid_Bit == l) 

Target_Addrese_lnfo = Read_OS_Error_Log (Section^ Body_Pointer + 
Target_Address_Of f set) ; 

if <Precise_IP_Valid_Bit l) 

Precise_IP_Info « Read_OS_Error_Log(Section_Body_Pointer + 
Precise_XP_Of f set) ; 

/* Determine if the Target Address was captured by the processor or */ 
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/* not. If it was, determine if it points to global memory! shared */ 
/* memory or if it is private. If it points to a global memory */ 
/* structure, then a system reboot is necessary. If it is shared */ 
/* or private it may be recoverable. */ 

// if no target physical address is captured, then we have to reboot 
if (Target Physical Addres TarId*Not Valid) 
SystemReset () or return (failure) ; 

// target physical address is captured, check with OS if this is 

global address page 
if (OsIsTargetAddressGlobal (Tarld) ) 

SystemReset () or return (failure) // in global page,, it is bad 

news 

/* Now we know that the target address does not point to shared */ 

/* memory. Check to see if a precise instruction pointer was captured . 

*/ 

/* If it was then check to see if it is a user or kernal IP. If we */ 
/* have the precise IP map to the processes and kill it, else we have 
*/ 

/* to kill processes based on target address* */ 

// so far so good, TardID is in local page: Do we have precise IP? 

if (FreciseIP=*true) 

{ 

// yes, precise IP is captured, so take this branch 
if (OsIsIpInKernelSpace (IP) ) 

{ 

// IP in kernel space 
KernelSpaceIpFlag«l ; 
if (OsisProceasCritical (IP, 0) ==true) 
SystemReset () ,- 

else 

{ 

// kill all non-critical OS processes at IP 
OsKilXAllProcesses(IP,0) ; 
return (success) ; 

} 

} 

else 
{ 

// XP is in user space 

UserSpaceIpFlag=l; 

// kill all shared user processes 

OsKillAllProcesees (IP, 0) ; 

return (success) ; 

} 

} 

else 

/* We do not have precise IP, so try to map the Target physical */ 
/* address to a processes. If the target address points to shared */ 
/* data, then all sharing processes need to be killed. If the */ 
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/* target address points to a private page {global has been checked */ 
/* above) then just kill the offending process. */ 

{ 

// Try and map Target Physical Address to a process data area 
if (PreviledgeLevel— valid) //check if previledge level is valid 

{ 

// ipl=Instruction Priviledge level 
if (ipl«user_level) // at user level 

{ 

// this is user priveledge level 
OsKillAllProcesses (0,Tarld) ; 
return (rv) ; 

} 

else // kernel level 
( 

/* If the OS has a way to determine if the IP is in a critical part */ 
/* of the kernal this can determine if the kernal process can be */ 
/* killed or not. If the OS always puts critical kernal code in a */ 
/* certain IP range, this could be a way it could determine. */ 

// this is kernel priviledge level 
if(OsIsProcessCritical(0,Tarld)) 
// OS critical process error, all bets are off.,. 
SystemReset 0 or return { failure ) ; 

// good, can kill all non-critical processes using Tardld 
OsKillAllProcesses (0, Tar Id) ; 
return (success) ; 

} 

} 

else 

// sorry, don't have privilege level information, all bets 

are off. . , 
SystemReset {) or return (failure) ; 

} 

return (succcess) ; 

/* END */ 
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